{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Chapter 16: Bayesian statistics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Robert Johansson\n",
    "\n",
    "Source code listings for [Numerical Python - Scientific Computing and Data Science Applications with Numpy, SciPy and Matplotlib](https://www.apress.com/us/book/9781484242452) (ISBN 978-1-484242-45-2)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "import pymc as mc "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "%config InlineBackend.figure_format='retina'\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "sns.set()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import statsmodels.formula.api as smf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Changelog:\n",
    "\n",
    "* 160828: The keyword argument `vars` to the functions `mc.traceplot` and `mc.forestplot` was changed to `varnames`.\n",
    "* 231125: The keyword `varnames` was changed to `var_names`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Simple example: Normal distributed random variable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "# try this\n",
    "# dir(mc.distributions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mu = 4.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sigma = 2.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = mc.Model()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with model:\n",
    "    mc.Normal('X', mu, tau=1/sigma**2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "model.continuous_value_vars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "start = dict(X=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with model:\n",
    "    step = mc.Metropolis()\n",
    "    trace = mc.sample(10000, step=step, start=start)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.linspace(-4, 12, 1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "y = stats.norm(mu, sigma).pdf(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import arviz as az"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# az.extract(trace)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_values(trace, variable):\n",
    "    return trace.posterior.stack(sample=['chain', 'draw'])[variable].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = get_values(trace, \"X\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(8, 3))\n",
    "\n",
    "ax.plot(x, y, 'r', lw=2)\n",
    "sns.histplot(X, ax=ax, kde=True, stat='density')\n",
    "ax.set_xlim(-4, 12)\n",
    "ax.set_xlabel(\"x\")\n",
    "ax.set_ylabel(\"Probability distribution\")\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-normal-distribution-sampled.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(1, 2, figsize=(12, 4), squeeze=False)\n",
    "mc.plot_trace(trace, axes=axes)\n",
    "axes[0,0].plot(x, y, 'r', lw=0.5)\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-normal-sampling-trace.png\")\n",
    "fig.savefig(\"ch16-normal-sampling-trace.pdf\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Dependent random variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = mc.Model()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#mc.HalfNormal??"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with model:\n",
    "    mean = mc.Normal('mean', 3.0)\n",
    "    sigma = mc.HalfNormal('sigma', sigma=1.0)\n",
    "    X = mc.Normal('X', mean, tau=sigma)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "model.continuous_value_vars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with model:\n",
    "    start = mc.find_MAP()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "start"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with model:\n",
    "    step = mc.Metropolis()\n",
    "    trace = mc.sample(10000, start=start, step=step)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "get_values(trace, \"sigma\").mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "# trace.get_values('sigma').mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = get_values(trace, \"X\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# X = trace.get_values('X')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "X.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "X.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(3, 2, figsize=(8, 6), squeeze=False)\n",
    "mc.plot_trace(trace, var_names=['mean', 'sigma', 'X'], axes=axes)\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-dependent-rv-sample-trace.png\")\n",
    "fig.savefig(\"ch16-dependent-rv-sample-trace.pdf\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Posterior distributions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mu = 2.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "s = 1.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "data = stats.norm(mu, s).rvs(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with mc.Model() as model:\n",
    "    \n",
    "    mean = mc.Normal('mean', 4.0, tau=1.0) # true 2.5\n",
    "    sigma = mc.HalfNormal('sigma', tau=3.0 * np.sqrt(np.pi/2)) # true 1.5\n",
    "\n",
    "    X = mc.Normal('X', mean, tau=1/sigma**2, observed=data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "model.continuous_value_vars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with model:\n",
    "    start = mc.find_MAP()\n",
    "    step = mc.Metropolis()\n",
    "    trace = mc.sample(10000, start=start, step=step)\n",
    "    #step = mc.NUTS()\n",
    "    #trace = mc.sample(10000, start=start, step=step)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "start"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "model.continuous_value_vars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(2, 2, figsize=(8, 4), squeeze=False)\n",
    "mc.plot_trace(trace, var_names=['mean', 'sigma'], axes=axes)\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-posterior-sample-trace.png\")\n",
    "fig.savefig(\"ch16-posterior-sample-trace.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# trace.posterior.stack(sample=['chain', 'draw'])[\"mean\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "mu, get_values(trace, \"mean\").mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "s, get_values(trace, \"sigma\").mean() # trace.posterior.stack(sample=['chain', 'draw'])[\"sigma\"].values.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "gs = mc.plot_forest(trace, var_names=['mean', 'sigma'])\n",
    "plt.savefig(\"ch16-forestplot.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "# help(mc.summary)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "mc.summary(trace, var_names=['mean', 'sigma'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "dataset = sm.datasets.get_rdataset(\"Davis\", \"carData\", cache=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "data = dataset.data[dataset.data.sex == 'M']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data[data.weight < 110]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "data.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "model = smf.ols(\"height ~ weight\", data=data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "result = model.fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "print(result.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.linspace(50, 110, 25)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = result.predict({\"weight\": x})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(1, 1, figsize=(8, 3))\n",
    "ax.plot(data.weight, data.height, 'o')\n",
    "ax.plot(x, y, color=\"blue\")\n",
    "ax.set_xlabel(\"weight\")\n",
    "ax.set_ylabel(\"height\")\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-linear-ols-fit.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with mc.Model() as model:\n",
    "    sigma = mc.Uniform('sigma', 0, 10)\n",
    "    intercept = mc.Normal('intercept', 125, sigma=30)\n",
    "    beta = mc.Normal('beta', 0, sigma=5)\n",
    "    \n",
    "    height_mu = intercept + beta * data.weight\n",
    "\n",
    "    # likelihood function\n",
    "    mc.Normal('height', mu=height_mu, sigma=sigma, observed=data.height)\n",
    "\n",
    "    # predict\n",
    "    predict_height = mc.Normal('predict_height', mu=intercept + beta * x, sigma=sigma, shape=len(x)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "model.continuous_value_vars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# help(mc.NUTS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#mc.sample?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with model:\n",
    "    # start = mc.find_MAP()\n",
    "    step = mc.NUTS()\n",
    "    trace = mc.sample(10000, step=step) # , start=start)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "model.continuous_value_vars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(2, 2, figsize=(8, 4), squeeze=False)\n",
    "mc.plot_trace(trace, var_names=['intercept', 'beta'], axes=axes)\n",
    "fig.savefig(\"ch16-linear-model-sample-trace.pdf\")\n",
    "fig.savefig(\"ch16-linear-model-sample-trace.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "intercept = get_values(trace, \"intercept\").mean()\n",
    "intercept"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "beta = get_values(trace, \"beta\").mean() # trace.posterior.stack(sample=['chain', 'draw'])[\"beta\"].values.mean()\n",
    "beta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "result.params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "result.predict({\"weight\": 90})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "weight_index = np.where(x == 90)[0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# trace.posterior.stack(sample=['chain', 'draw'])[\"predict_height\"].values[weight_index, :].mean()\n",
    "get_values(trace, \"predict_height\")[weight_index, :].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "#trace.get_values(\"predict_height\")[:, weight_index].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# trace.posterior.stack(sample=['chain', 'draw'])[\"predict_height\"].values.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(8, 3))\n",
    "\n",
    "sns.histplot(get_values(trace, \"predict_height\")[weight_index, :], ax=ax, kde=True, stat='density')\n",
    "ax.set_xlim(150, 210)\n",
    "ax.set_xlabel(\"height\")\n",
    "ax.set_ylabel(\"Probability distribution\")\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-linear-model-predict-cut.pdf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# trace.posterior.stack(sample=['chain', 'draw'])[\"intercept\"].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(1, 1, figsize=(12, 4))\n",
    "\n",
    "for n in range(500, 2000, 1):\n",
    "    intercept = get_values(trace, \"intercept\")[n]\n",
    "    beta = get_values(trace, \"beta\")[n]\n",
    "    ax.plot(x, intercept + beta * x, color='red', lw=0.25, alpha=0.05)\n",
    "\n",
    "intercept = get_values(trace, \"intercept\").mean()\n",
    "beta = get_values(trace, \"beta\").mean()\n",
    "ax.plot(x, intercept + beta * x, color='k', label=\"Mean Bayesian prediction\")\n",
    "\n",
    "ax.plot(data.weight, data.height, 'o')\n",
    "ax.plot(x, y, '--', color=\"blue\", label=\"OLS prediction\")\n",
    "ax.set_xlabel(\"weight\")\n",
    "ax.set_ylabel(\"height\")\n",
    "ax.legend(loc=0)\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-linear-model-fit.pdf\")\n",
    "fig.savefig(\"ch16-linear-model-fit.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import bambi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = bambi.Model('height ~ weight', data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model.fit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trace = model.fit(draws=2000) #  chains=4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "#with mc.Model() as model:\n",
    "#    mc.glm.GLM.from_formula('height ~ weight', data)\n",
    "#    step = mc.NUTS()\n",
    "#    trace = mc.sample(2000, step)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# trace"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(3, 2, figsize=(8, 6), squeeze=False)\n",
    "mc.plot_trace(trace, var_names=['Intercept', 'weight', 'height_sigma'], axes=axes)\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-glm-sample-trace.pdf\")\n",
    "fig.savefig(\"ch16-glm-sample-trace.png\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Multilevel model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = sm.datasets.get_rdataset(\"Davis\", \"carData\", cache=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = dataset.data.copy()\n",
    "data = data[data.weight < 110]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "data[\"sex\"] = data[\"sex\"].apply(lambda x: 1 if x == \"F\" else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with mc.Model() as model:\n",
    "\n",
    "    # heirarchical model: hyper priors\n",
    "    #intercept_mu = mc.Normal(\"intercept_mu\", 125)\n",
    "    #intercept_sigma = 30.0 #mc.Uniform('intercept_sigma', lower=0, upper=50)\n",
    "    #beta_mu = mc.Normal(\"beta_mu\", 0.0)\n",
    "    #beta_sigma = 5.0 #mc.Uniform('beta_sigma', lower=0, upper=10)\n",
    "    \n",
    "    # multilevel model: prior parameters\n",
    "    intercept_mu, intercept_sigma = 125, 30\n",
    "    beta_mu, beta_sigma = 0.0, 5.0\n",
    "    \n",
    "    # priors\n",
    "    intercept = mc.Normal('intercept', intercept_mu, sigma=intercept_sigma, shape=2)\n",
    "    beta = mc.Normal('beta', beta_mu, sigma=beta_sigma, shape=2)\n",
    "    error = mc.Uniform('error', 0, 10)\n",
    "\n",
    "    # model equation\n",
    "    sex_idx = data.sex.values\n",
    "    height_mu = intercept[sex_idx] + beta[sex_idx] * data.weight\n",
    "\n",
    "    mc.Normal('height', mu=height_mu, sigma=error, observed=data.height)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "model.continuous_value_vars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "with model:\n",
    "    start = mc.find_MAP()\n",
    "    # hessian = mc.find_hessian(start)\n",
    "    step = mc.NUTS()\n",
    "    trace = mc.sample(5000, step=step, start=start)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(3, 2, figsize=(8, 6), squeeze=False)\n",
    "mc.plot_trace(trace, var_names=['intercept', 'beta', 'error'], axes=axes)\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-multilevel-sample-trace.pdf\")\n",
    "fig.savefig(\"ch16-multilevel-sample-trace.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "intercept_m, intercept_f = get_values(trace, 'intercept').mean(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "intercept = get_values(trace, 'intercept').mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "get_values(trace, 'beta')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "beta_m, beta_f = get_values(trace, 'beta').mean(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "beta = get_values(trace, 'beta').mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(1, 1, figsize=(8, 3))\n",
    "\n",
    "mask_m = data.sex == 0\n",
    "mask_f = data.sex == 1\n",
    "\n",
    "ax.plot(data.weight[mask_m], data.height[mask_m], 'o', color=\"steelblue\", label=\"male\", alpha=0.5)\n",
    "ax.plot(data.weight[mask_f], data.height[mask_f], 'o', color=\"green\", label=\"female\", alpha=0.5)\n",
    "\n",
    "x = np.linspace(35, 110, 50)\n",
    "ax.plot(x, intercept_m + x * beta_m, color=\"steelblue\", label=\"model male group\")\n",
    "ax.plot(x, intercept_f + x * beta_f, color=\"green\", label=\"model female group\")\n",
    "ax.plot(x, intercept + x * beta, color=\"black\", label=\"model both groups\")\n",
    "\n",
    "ax.set_xlabel(\"weight\")\n",
    "ax.set_ylabel(\"height\")\n",
    "ax.legend(loc=0)\n",
    "fig.tight_layout()\n",
    "fig.savefig(\"ch16-multilevel-linear-model-fit.pdf\")\n",
    "fig.savefig(\"ch16-multilevel-linear-model-fit.png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "get_values(trace, 'error').mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%reload_ext version_information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [],
   "source": [
    "%version_information numpy, pandas, matplotlib, statsmodels, pymc3, theano"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
